"""
爬取上市公司邮箱、董秘、董秘邮箱
"""
import requests
from bs4 import BeautifulSoup
import openpyxl

def readStockCode():
    wb = openpyxl.load_workbook('stockCode.xlsx')
    ws = wb['sheet1']
    code = []
    for i in range(2,3694):
        stockCode = ws.cell(i, 1).value
        cell = stockCode.split('.')
        code.append(cell[0])
    return code

DOWNLOAD_URL = 'http://quotes.money.163.com/f10/gszl_'

def download_page(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
    }
    data = requests.get(url, headers=headers).content
    return data

def parse_html(html):
    soup = BeautifulSoup(html)
    detail_table_soup = soup.find('table', attrs={'class': 'table_bg001 border_box limit_sale table_details'})
    email1_content = detail_table_soup.find_all('tr')[3].find_all('td')[3].getText()
    name_content = detail_table_soup.find_all('tr')[5].find_all('td')[3].getText()
    email2_content = detail_table_soup.find_all('tr')[8].find_all('td')[3].getText()

    return email1_content, name_content, email2_content

def write2xlsm(code,list1, list2, list3):
    wb = openpyxl.Workbook()
    ws = wb.create_sheet('sheet1', 0)
    ws = wb['sheet1']
    for i in range(1, len(code)+1):
        ws.cell(i, 1).value = code[i-1]
        ws.cell(i, 2).value = list1[i-1]
        ws.cell(i, 3).value = list2[i-1]
        ws.cell(i, 4).value = list3[i-1]
    wb.save('result.xlsx')

def main():
    code = readStockCode()
    email1_list = []
    name_list = []
    email2_list = []
    for i in range(0,len(code)):
        url = DOWNLOAD_URL + code[i] +'.html#01f01'
        html = download_page(url)
        email1, name, email2 = parse_html(html)
        print("complete:",code[i])

        email1_list.append(email1)
        name_list.append(name)
        email2_list.append(email2)
    write2xlsm(code, email1_list, name_list,  email2_list)

if __name__ == '__main__':
    main()